args <- commandArgs(trailingOnly = T)
if (length(args) != 1) {
  stop("Must give args: RScript.exe HPC.R --args 'id'")
} else {
  jj <- as.numeric(args[1])
}

library(Rmpi)
library(doMPI)

n.cores <- mpi.comm.size(0) - 1

cl <- startMPIcluster(n.cores, verbose=T)
registerDoMPI(cl)

### Below files assume that directory looks like:
### base
### -- StateData
### -- Backbones
### and you set directory to /base
setwd()


library(data.table)
library(Matrix)
library(gplots)
library(pscl)

## Parallel tools
library(parallel)
library(foreach)
library(itertools)

## homemade package available in replication files
library(hurdleBackbone)



source("scripts.R")

files <- dir("StateData")
party <- "A"

selec <- files[jj]


tmp.full.data <- fread(paste("StateData/", files[jj], sep = ""))
tmp.full.data <- as.data.frame(tmp.full.data)


tmp.full.data <- tmp.full.data[!is.na(tmp.full.data$Contributor.id), ]


tmp.data <-  tmp.full.data[, c("Total_.", "Contributor.id", "Candidate_Entity.id")]


names(tmp.data) <- c("Amount","ContributorEID","CandidateName")
#### Need to find previous file ####
file.base <- strsplit(files[jj], "-")[[1]]
file.base[4] <- strsplit(file.base[4], "\\.")[[1]][1]
other.files <-
  files[grepl(file.base[1], files) &
          grepl(file.base[4], files) & !(files[jj] == files)]
# other.files.check <- strsplit(other.files, "-")
# id <- numeric()
ids.old <- numeric()
for (ii in 1:length(other.files)) {
  # if (as.numeric(other.files.check[[ii]][3]) < as.numeric(file.base[2])) {
  #   id <- append(id, ii)
  # }

  df.prev <-
    read.csv(paste("StateData/", other.files[ii], sep=""))

  names(df.prev)[names(df.prev) %in% c("Total_.", "Contributor.id", "Candidate_Entity.id")] <- c("CandidateName" ,"ContributorEID", "Amount")
  ids.tmp <- unique(df.prev$ContributorEID)
  ids.old <- unique(c(ids.old, ids.tmp))


}

ids <- unique(tmp.data$ContributorEID)

ids.keep <- ids[ids %in% ids.old]

tmp.data <- tmp.data[tmp.data$ContributorEID %in% ids.keep,]

id.multiple <- names(which(table(tmp.data$ContributorEID)>1))
tmp.data <- tmp.data[tmp.data$ContributorEID %in% id.multiple,]

ncol <- length(unique(tmp.data$CandidateName))
nrow <- length(unique(tmp.data$ContributorEID))
mat <- Matrix(0, ncol = ncol, nrow = nrow)
colnames(mat) <- unique(tmp.data$CandidateName)
c.mat <- colnames(mat)
rownames(mat) <- unique(tmp.data$ContributorEID)
r.mat <- rownames(mat)


while (nrow(tmp.data) > 0) {
  mat[tmp.data$ContributorEID[1] == r.mat, tmp.data$CandidateName[1] == c.mat]   <-
    check(tmp.data$ContributorEID[1],
          tmp.data$CandidateName[1],
          tmp.data)
  tmp.data <-
    tmp.data[!(
      tmp.data$ContributorEID == tmp.data$ContributorEID[1] &
        tmp.data$CandidateName == tmp.data$CandidateName[1]
    ),]
}

mat[mat < 0] <- 0
mat <- mat / 100
#mat <- mat > 0

if (sum(rowSums(mat) > 1) != 0) {
  set.seed(711)
  bipart <- t(mat[rowSums(mat) > 0, ])
  bipart <- bipart[rowSums(bipart) > 0,]
  ig.degree <- colSums(bipart)
  cand.degree <- rowSums(bipart)

  full.d <-
    matrix(NA,
           nrow = nrow(bipart) * ncol(bipart),
           ncol = 3)
  jj <- 1
  kk <- 1
  for (ii in 1:nrow(full.d)) {
    full.d[ii, 1] <- bipart[kk, jj]
    full.d[ii, 2] <- ig.degree[[jj]]
    full.d[ii, 3] <- cand.degree[kk]
    kk <- kk + 1
    if (kk > nrow(bipart)) {
      jj <- jj + 1
      kk <- 1
    }
  }

  colnames(full.d) <- c("Y", "X_1", "X_2")



  tmp.df <- as.data.frame(full.d)
  tmp.df <- floor(tmp.df)

  # div.1 <- 10 ^ (floor(log10(max(tmp.df$X_1))))
  # div.2 <- 10 ^ (floor(log10(max(tmp.df$X_2))))


  # mod <-
  # hurdle(Y ~ I(X_1 / div.1) + I(X_2 / div.2) + I(X_1 * X_2 / (div.1 * div.2)),
  # data = tmp.df, control=hurdle.control(method="BFGS",
  # trace=1))
  tmp.df$X_1 <- log(tmp.df$X_1 + min(tmp.df$X_1[tmp.df$X_1!=0])/2)
  tmp.df$X_2 <- log(tmp.df$X_2 + min(tmp.df$X_2[tmp.df$X_2!=0])/2)

  mod <-
    hurdle(Y ~ X_1 + I(X_2) + I(X_1 * X_2),
           data = tmp.df, control=hurdle.control(method="BFGS",
                                                 trace=1))

  n.sims <- 1000

  pred.zero <-
    inverse.logit(matrix(
      c(
        rep(1, nrow(tmp.df)),
        tmp.df$X_1,
        tmp.df$X_2,
        tmp.df$X_1 * tmp.df$X_2
      ),
      ncol = 4,
      nrow = nrow(tmp.df)
    ) %*% mod$coefficients$zero)
  pred.count <-
    exp(matrix(
      c(
        rep(1, nrow(tmp.df)),
        tmp.df$X_1,
        tmp.df$X_2,
        tmp.df$X_1 * tmp.df$X_2
      ),
      ncol = 4,
      nrow = nrow(tmp.df)
    ) %*% mod$coefficients$count)





  n.row <- nrow(bipart)
  n.col <- ncol(bipart)

  pred.zero <- matrix(pred.zero, nrow = n.row, ncol = n.col)
  pred.count <- matrix(pred.count, nrow = n.row, ncol = n.col)


  # pred.zero <- 1-array(pred.zero, dim=c(n.row, n.col, n.sims))
  # pred.count <- array(pred.count, dim=c(n.row, n.col, n.sims))

  uni.mode <- crossprod(bipart)

  tmp.uni.mode <- tril(uni.mode, k = -1)
  sim.ids <- which(tmp.uni.mode != 0, arr.ind = T)

  par.out <-
    foreach(
      mm = isplitRows(sim.ids, chunks = n.cores),
      .combine = rbind,
      .packages = c("Matrix", "iterators", "itertools", "backbone"),
      .init = data.frame(
        "row" = numeric(),
        "col" = numeric(),
        "out" = numeric()
      ),
      .inorder = F
    ) %dopar% {
      ind <-
        check_sims(mm, pred.count, pred.zero, p=c(.9, .95, .975, .99, .995), as.matrix(uni.mode))

      return(data.frame(
        out = ind,
        row = mm[,1],
        col = mm[,2]
      ))
    }

  back.mat <- matrix(0, nrow=nrow(uni.mode), ncol=ncol(uni.mode))
  back.mat[as.matrix(par.out[,2:3])] <- par.out$out

  #sum(as.matrix(back.mat > (2+ (uni.mode > 0))))

  back.mat <- Matrix(back.mat)
  colnames(back.mat) <- colnames(uni.mode)
  rownames(back.mat) <- rownames(uni.mode)

  f.name <-
    paste("backbones/", selec, ".RData", sep = "")
  save(back.mat, file = f.name)

}




closeCluster(cl)
mpi.quit()
q(save="no")



